clear all
set more off

global data 	"R:\SharedProjects\Shared2020-070\2016\extend_to_2020\JPE_Replication_dta"
global in 		"R:\SharedProjects\Shared2020-070\2016\input"
global rand 	"R:\Public\Contributions\Rand\RandHrs2018V1\stata"
global rawdata  "R:\Public\2007mail\final\2007dvs\stata"
global figures 	"R:\SharedProjects\Shared2020-070\2016\extend_to_2020\JPE_Replication_log"

cap log close
log using $figures\F_file,replace t

**********************
** Data preparation **
**********************

*** 2007 data
* this survey was administered by mail
* there are 2 versions: A and B
* they differ in the order of questions and in the gender assigned to the hypothetical persons
* men and women got the same questionnaires

/* Wording: ... In terms of their
         age, their education, and their work histories, you should imagine that these
         men or women are similar to yourself.  Other than the conditions explicitly
         mentioned, you should imagine the individual is in reasonably good health.
*/	


cd $data


use "R:\Restricted\Industry and Occupation\stata\IOXWAVE20.dta",clear
keep hhid pn k*
egen hhidpn = concat(hhid pn)
drop hhid pn
destring hhidpn, replace

local occvars " kj060 kj062 kj168 kk010 kl015 kmw201_1 kmw201_2 kmw201_3 kmw201_4 kmw201_5 "

foreach x of local occvars {
	gen c`x'=`x'>=300 & `x'<=365
	gen s`x'=`x'>=300 & `x'<=354
	}
egen sumc=rsum( ckj060- ckmw201_5)
egen sums=rsum( skj060- skmw201_5)

gen health_coarse=sumc>=1
gen health_strict=sums>=1

keep hhidpn health_coarse health_strict
sort hhidpn
save health_occup,replace


*************************************
u "$data\temp1",replace

forvalues j=1(1)15	{
	ren rprprm1`j' rprprm1_`j'
	ren rprprm2`j' rprprm2_`j'
	ren rprprm3`j' rprprm3_`j'
		}

reshape  long rmstat     rdiab    rdoctim     rissdi      rhiothp   rjcocc     rjlasty          rtotmd 	///
 rproxy    rcancr     roopmd      risdi       rjcoccb     rinlbrf     interview_date 					///
riwstat     rlung    roopmdo      hitot      rmrprem    rjcoccc     rhlthlm     rhlthlm_direct 			///
ragey_b    rheart     rpmbmi      rissi      rprpcnt     rjcind     rwalkra     rhlth_temp 				///
riwendm    rstrok     hastck     rhenum      rprprm1_     rjlten     rstoopa      rcantwork 			///
riwendy    rpsych     habond     rhigov      rprprm2_     rjlocc     rdressa     					 	///
  rshlt    rarthr     hachck     rgovmr      rprprm3_    rjlocca rjloccb     rbeda 						///
   rbmi     rhosp       hacd     rgovmd        rlbrf    rjloccc     rmealsa 							///
  rhibp    rhspnit     hatotb     rgovva       rjcten     rjlind     rshopa  inw , i(hhidpn) j(wave)

* binary race variable
gen white =.
replace white = 1 if raracem==1
replace white = 0 if raracem==2 | raracem ==3
label def white 1 "White" 0 "Non-white"
label values white white

* binary gender variable
gen female=.
replace female=1 if ragender==2
replace female=0 if ragender==1
lab def female 1 "female" 0 "male"
lab values female female

gen college = .
replace college = 0 if raeduc==1 | raeduc==2 | raeduc==3
replace college = 1 if raeduc==4 | raeduc==5
label def college 0 "At most High School degree" 1 "Some college or more"
label values college college

keep if wave==8	& inw==1		/*Only people in wave 8 (2006) got asked the DVS questions*/

gen cantwork_notemp = rhlthlm==1 & rcantwork==1 & rhlth_temp == 0

tab raracem, gen(racex)

gen bs_sf = .
replace bs_sf=0 if rhibp==0 & rpsych==0 & rheart==0 & rarthr==0  & rdiab==0 & rlung==0 & rstrok==0 & rcancr==0
replace bs_sf=1 if rhibp==1
replace bs_sf=2 if rpsych==1
replace bs_sf=3 if rheart==1
replace bs_sf=4 if rarthr==1
replace bs_sf=5 if rdiab==1
replace bs_sf=6 if rlung==1
replace bs_sf=7 if rstrok==1
replace bs_sf=8 if rcancr==1
label def bs_sf  0 "None" 1 "High BP" 2 "Psych.cond." 3 "Heart" 4 "Arthritis" 5 "Diabetis" ///
				6 "Lung disease" 7 "Stroke" 8 "Cancer" ,replace
label values bs_sf bs_sf

gen doctor_told_has_hbp=bs_sf==1
gen doctor_told_has_psy=bs_sf==2
gen doctor_told_has_hea=bs_sf==3
gen doctor_told_has_art=bs_sf==4
gen doctor_told_has_dia=bs_sf==5
gen doctor_told_has_lun=bs_sf==6
gen doctor_told_has_str=bs_sf==7
gen doctor_told_has_can=bs_sf==8

replace rjlocc=1 if  rjlocc==.b & (rjloccb==1|rjloccb==2|rjloccb==3)
replace rjlocc=2 if  rjlocc==.b & (rjloccb==4|rjloccb==5|rjloccb==6|rjloccb==7|rjloccb==8|rjloccb==9|rjloccb==10|rjloccb==11)
replace rjlocc=3 if  rjlocc==.b & (rjloccb==17)
replace rjlocc=4 if  rjlocc==.b & (rjloccb==18)
replace rjlocc=5 if  rjlocc==.b & (rjloccb==15)
replace rjlocc=6 if  rjlocc==.b & (rjloccb==13)
replace rjlocc=7 if  rjlocc==.b & (rjloccb==14)
replace rjlocc=8 if  rjlocc==.b & (rjloccb==12)
replace rjlocc=9 if  rjlocc==.b & (rjloccb==16)
replace rjlocc=10 if rjlocc==.b & (rjloccb==19)
replace rjlocc=11 if rjlocc==.b & (rjloccb==22)
replace rjlocc=12 if rjlocc==.b & (rjloccb==20|rjloccb==21)
replace rjlocc=13 if rjlocc==.b & (rjloccb==23)
replace rjlocc=15 if rjlocc==.b & (rjloccb==24)
replace rjlocc=17 if rjlocc==.b & (rjloccb==25)

replace rjlocc = 0 if rjlocc > 3000
	
ren rjlocc longest_occ_combined

foreach i in hibp diab cancr lung heart strok psych arthr {
	replace r`i' = 1 if r`i' == 3 //disputes previous report and now has condition
	replace r`i' = 0 if r`i' == 4 //disputes previous report and now does not
	ren r`i' `i'
}

ren rmstat marital_status
gen married=.
replace married=1 if marital_status==1
replace married=0 if marital_status!=1 & marital_status!=.
lab def married 1 "married" 0 "unmarried"
lab values married married
gen widowed=.
replace widowed=1 if marital_status==7
replace widowed=0 if marital_status!=7 & marital_status!=.
lab def widowed 1 "widowed" 0 "not widowed"
lab values widowed widowed


* ADL recode
foreach i in walkra dressa stoopa beda hosp {
	gen r`i'_1 = 0
	replace r`i'_1 = 1 if r`i' == 1
	ren r`i'_1 `i'
}

ren rbmi bmi
ren rproxy proxy

gen year=year(interview_date)
gen age=year-rabyear

ren rissdi received_ssidi

keep hhidpn college racex* longest_occ_combined proxy cantwork_notemp hibp diab cancr lung heart strok psych arthr bmi married widowed walkra dressa stoopa beda hosp age bs_sf doctor* received_ssidi
sort hhidpn 
merge hhidpn using health_occup
tab _merge
drop if _merge==2
drop _merge
sort hhidpn
save temp1_DVS,replace


****EXPERIENCE
u $data\complete_data_final_v1,clear
keep hhidpn year experience
duplicates drop hhidpn year experience,force
egen maxexp=max(experience),by(hhidpn)
gen experience_max=experience
replace experience_max=maxexp if year==2007 & experience==.
keep hhidpn year experience experience_max
keep if year==2007
sort hhidpn year
save $data\experience.dta, replace
*****




/*Now Vignette Data*/

cd "$data"
use "$rawdata\DVS07A_R.dta", clear
ren *, lower
ren a35b_07 yob
ren a35a_07 sex
ren a01_07 selfrate_pain
ren a02_07 selfrate_sleep
ren a03_07 selfrate_mobility
ren a04_07 selfrate_memory
ren a05_07 selfrate_breath
ren a06_07 selfrate_depressed
ren a07_07 selfrate_hlthlm

ren a08_07 otherrate_pain1
ren a10_07 otherrate_pain2
ren a12_07 otherrate_pain3
ren a09_07 otherrate_sleep1
ren a11_07 otherrate_sleep2
ren a13_07 otherrate_sleep3
ren a14_07 otherrate_mobility1
ren a16_07 otherrate_mobility2
ren a18_07 otherrate_mobility3
ren a15_07 otherrate_memory1
ren a17_07 otherrate_memory2
ren a19_07 otherrate_memory3
ren a20_07 otherrate_breath1
ren a22_07 otherrate_breath2
ren a24_07 otherrate_breath3
ren a21_07 otherrate_depressed1
ren a23_07 otherrate_depressed2
ren a25_07 otherrate_depressed3
ren a26_07 otherrate_pain_hlthlm1
ren a27_07 otherrate_pain_hlthlm2
ren a28_07 otherrate_pain_hlthlm3
ren a29_07 otherrate_depressed_hlthlm1
ren a30_07 otherrate_depressed_hlthlm2
ren a31_07 otherrate_depressed_hlthlm3
ren a32_07 otherrate_cardiov_hlthlm1
ren a33_07 otherrate_cardiov_hlthlm2
ren a34_07 otherrate_cardiov_hlthlm3
gen version="A"
tempfile temp
save `temp'

use "$rawdata\DVS07B_R.dta", clear

ren *, lower

ren b35b_07 yob
ren b35a_07 sex
ren b06_07 selfrate_pain
ren b05_07 selfrate_sleep
ren b04_07 selfrate_mobility
ren b03_07 selfrate_memory
ren b02_07 selfrate_breath
ren b01_07 selfrate_depressed
ren b07_07 selfrate_hlthlm

ren b25_07 otherrate_pain1
ren b23_07 otherrate_pain2
ren b21_07 otherrate_pain3
ren b24_07 otherrate_sleep1
ren b22_07 otherrate_sleep2
ren b20_07 otherrate_sleep3
ren b19_07 otherrate_mobility1
ren b17_07 otherrate_mobility2
ren b15_07 otherrate_mobility3
ren b18_07 otherrate_memory1
ren b16_07 otherrate_memory2
ren b14_07 otherrate_memory3
ren b13_07 otherrate_breath1
ren b11_07 otherrate_breath2
ren b09_07 otherrate_breath3
ren b12_07 otherrate_depressed1
ren b10_07 otherrate_depressed2
ren b08_07 otherrate_depressed3

ren b34_07 otherrate_pain_hlthlm1
ren b33_07 otherrate_pain_hlthlm2
ren b32_07 otherrate_pain_hlthlm3
ren b31_07 otherrate_depressed_hlthlm1
ren b30_07 otherrate_depressed_hlthlm2
ren b29_07 otherrate_depressed_hlthlm3
ren b28_07 otherrate_cardiov_hlthlm1
ren b27_07 otherrate_cardiov_hlthlm2
ren b26_07 otherrate_cardiov_hlthlm3

gen version="B"
append using `temp'
gen year=2007 // survey was mailed out in the Fall of 2007
egen hhidpn=concat(hhid pn)
destring hhidpn,replace

drop hhid pn

gen female=.
replace female=1 if sex==2
replace female=0 if sex==1
drop sex

order hhidpn female yob selfrate_hlthlm

* add gender of fictitious person
gen  otherrate_pain1_f = 0 // this and the below are gender of 2007 version A vignette 
gen  otherrate_pain2_f = 0
gen  otherrate_pain3_f = 0 
gen  otherrate_sleep1_f = 1 
gen  otherrate_sleep2_f =  1
gen  otherrate_sleep3_f = 1
gen  otherrate_mobility1_f = 0
gen  otherrate_mobility2_f = 0
gen  otherrate_mobility3_f = 0
gen  otherrate_memory1_f = 1
gen  otherrate_memory2_f = 1
gen  otherrate_memory3_f = 1
gen  otherrate_breath1_f = 0
gen  otherrate_breath2_f = 0
gen  otherrate_breath3_f = 0
gen  otherrate_depressed1_f = 1
gen  otherrate_depressed2_f = 1
gen  otherrate_depressed3_f = 1
gen  otherrate_pain_hlthlm1_f = 1
gen  otherrate_pain_hlthlm2_f = 0
gen  otherrate_pain_hlthlm3_f = 1
gen  otherrate_depressed_hlthlm1_f = 0
gen  otherrate_depressed_hlthlm2_f = 1
gen  otherrate_depressed_hlthlm3_f = 0
gen  otherrate_cardiov_hlthlm1_f = 1
gen  otherrate_cardiov_hlthlm2_f = 0
gen  otherrate_cardiov_hlthlm3_f = 1

#delim ;
foreach x in 
	otherrate_pain1  
	otherrate_pain2   
	otherrate_pain3    
	otherrate_sleep1    
	otherrate_sleep2   
	otherrate_sleep3   
	otherrate_mobility1   
	otherrate_mobility2   
	otherrate_mobility3   
	otherrate_memory1   
	otherrate_memory2   
	otherrate_memory3   
	otherrate_breath1   
	otherrate_breath2   
	otherrate_breath3   
	otherrate_depressed1   
	otherrate_depressed2   
	otherrate_depressed3   
	otherrate_pain_hlthlm1   
	otherrate_pain_hlthlm2   
	otherrate_pain_hlthlm3   
	otherrate_depressed_hlthlm1   
	otherrate_depressed_hlthlm2   
	otherrate_depressed_hlthlm3   
	otherrate_cardiov_hlthlm1   
	otherrate_cardiov_hlthlm2   
	otherrate_cardiov_hlthlm3   {;
		replace `x'_f = 1- `x'_f if version=="B"; // 2007 version B has opposite gender as version A
}	;

#delim cr

destring hhidpn, replace


**** MAKE FINAL VIGNETTE DATA
cd $data
* change to long format
#delim ;
local l=0;
foreach x in 
	otherrate_pain_hlthlm1   
	otherrate_pain_hlthlm2   
	otherrate_pain_hlthlm3   
	otherrate_depressed_hlthlm1   
	otherrate_depressed_hlthlm2   
	otherrate_depressed_hlthlm3   
	otherrate_cardiov_hlthlm1   
	otherrate_cardiov_hlthlm2   
	otherrate_cardiov_hlthlm3   {;
		local l=`l'+1;
		ren `x' vign_`l';
		ren `x'_f vign_f_`l';
}	;
#delim cr

reshape long vign_ vign_f_, i(hhidpn year) j(vign_id)
gen vign_topic =""
replace vign_topic = "pain" if vign_id<=3
replace vign_topic = "depressed" if vign_id>3 & vign_id<=6
replace vign_topic = "cardiov" if vign_id>6
egen vign_topic2 = group(vign_topic)

ren vign_topic vign_topic_string
ren vign_topic2 vign_topic

ren vign_ vign_answer
ren vign_f_ vign_f

gen age_DVS = year-yob

egen vign_topic2 = group(vign_topic)

xtset hhidpn

gen vignxfem = vign_f*female
label var vignxfem "vign_f x female"

keep if female!=. & vign_f!=. & selfrate_hlthlm!=. & vign_topic!=. & vign_answer!=.

* keep only those individuals who respond to all vignette questions
gen ind=1
bysort hhidpn year: egen count_answers=total(ind)
drop if count_answers<9
drop ind

** descriptive stats
gen vign_answerf=vign_answer
replace vign_answerf=. if vign_f==0
gen vign_answerm=vign_answer
replace vign_answerm=. if vign_f==1

bysort hhidpn year: egen rate_f = mean(vign_answerf)
bysort hhidpn year: egen rate_m = mean(vign_answerm)
gen rate = 0.5*rate_f + 0.5*rate_m
gen ind=1
bysort hhidpn year: replace ind=0 if _n>1
	
lab var rate " \ \ \ \ overall"
lab var rate_f " \ \ \ \ female hypoth. person"
lab var rate_m " \ \ \ \ male hypoth. person"
lab var selfrate_hlthlm "Rating of own health cond."
lab var age "Age"

lab def female 1 "Female R" 0 "Male R", replace
lab val female female

		  
lab var rate " \ \  own gender hypoth. person"		  
		  
*** binary: whether gave answer of 4 or 5
gen vign_severe = vign_answer > 3

sort hhidpn
merge m:1 hhidpn using temp1_DVS
drop if _merge!=3	
/*1 individual in vignettes not matched with 2006 wave - it was a mail survey that some people did not return (death, attrition, or in 2 of the 3 other mail survey)*/
drop _merge



****************************SAMPLE SELECTION: aged 20-65, not proxy respondent, not in receipt of DI/SSI
keep if age>=20 & age<=65
keep if proxy==0
keep if received_ssidi==0
********************************************
tab longest,gen(longestoccx)

g occ_interm=.
replace occ_interm=1 if longest_occ_combined==0								/*Unknown*/
replace occ_interm=2 if longest_occ_combined>=1  & longest_occ_combined<=2	/*Manag/Prof*/
replace occ_interm=3 if longest_occ_combined>=3  & longest_occ_combined<=4	/*Sales/clerical*/
replace occ_interm=4 if longest_occ_combined>=5  & longest_occ_combined<=7	/*Clean/Protect/Food serv*/
replace occ_interm=5 if longest_occ_combined>=8  & longest_occ_combined<=9	/*Personal/Health serv*/
replace occ_interm=6 if longest_occ_combined>=10 & longest_occ_combined<=12	/*Farm/Constr/Mech*/
replace occ_interm=7 if longest_occ_combined==13 | longest_occ_combined==17	/*Precision/Armed force*/
replace occ_interm=8 if longest_occ_combined>=14 & longest_occ_combined<=16	/*operators*/

keep if year==2007

sort hhidpn year
merge hhidpn year using $data\experience
drop if _merge==2
drop _merge

keep hhidpn year female college racex2 doctor* married widowed age  walkra dressa stoopa beda hosp bmi occ_interm vign_f vign_id vign_severe cantwork proxy longestoccx* health* experience*
save "$data\vignette_data",replace


********************
erase temp1_DVS.dta
erase health_occup.dta

log close
